# HG changeset patch
# User iuc
# Date 1725465001 0
# Node ID ae2aad0a6d508cf8d2c549dd96e8193f3f8a6e4a
# Parent 5bf899c1397937b7b6741bfcb046a1d09f2ad0ea
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/edger commit 4b17306763bc8eb92c8967c7b4b57abcc514e670
diff -r 5bf899c13979 -r ae2aad0a6d50 edger.R
--- a/edger.R Wed Nov 22 03:57:37 2023 +0000
+++ b/edger.R Wed Sep 04 15:50:01 2024 +0000
@@ -42,8 +42,8 @@
# setup R error handling to go to stderr
options(show.error.messages = FALSE, error = function() {
- cat(geterrmessage(), file = stderr())
- q("no", 1, FALSE)
+ cat(geterrmessage(), file = stderr())
+ q("no", 1, FALSE)
})
# we need that to not crash galaxy with an UTF8 error on German LC settings.
@@ -64,41 +64,41 @@
# Function to sanitise contrast equations so there are no whitespaces
# surrounding the arithmetic operators, leading or trailing whitespace
sanitise_equation <- function(equation) {
- equation <- gsub(" *[+] *", "+", equation)
- equation <- gsub(" *[-] *", "-", equation)
- equation <- gsub(" *[/] *", "/", equation)
- equation <- gsub(" *[*] *", "*", equation)
- equation <- gsub("^\\s+|\\s+$", "", equation)
- return(equation)
+ equation <- gsub(" *[+] *", "+", equation)
+ equation <- gsub(" *[-] *", "-", equation)
+ equation <- gsub(" *[/] *", "/", equation)
+ equation <- gsub(" *[*] *", "*", equation)
+ equation <- gsub("^\\s+|\\s+$", "", equation)
+ return(equation)
}
# Function to sanitise group information
sanitise_groups <- function(string) {
- string <- gsub(" *[,] *", ",", string)
- string <- gsub("^\\s+|\\s+$", "", string)
- return(string)
+ string <- gsub(" *[,] *", ",", string)
+ string <- gsub("^\\s+|\\s+$", "", string)
+ return(string)
}
# Function to change periods to whitespace in a string
unmake_names <- function(string) {
- string <- gsub(".", " ", string, fixed = TRUE)
- return(string)
+ string <- gsub(".", " ", string, fixed = TRUE)
+ return(string)
}
# Sanitise file base names coming from factors or contrasts
sanitise_basename <- function(string) {
- string <- gsub("[/^]", "_", string)
- return(string)
+ string <- gsub("[/^]", "_", string)
+ return(string)
}
# Generate output folder and paths
make_out <- function(filename) {
- return(paste0(out_path, "/", filename))
+ return(paste0(out_path, "/", filename))
}
# Generating design information
paste_listname <- function(string) {
- return(paste0("factors$", string))
+ return(paste0("factors$", string))
}
# Create cata function: default path set, default seperator empty and appending
@@ -106,49 +106,49 @@
# defaults)
cata <- function(..., file = opt$htmlPath, sep = "", fill = FALSE, labels = NULL,
append = TRUE) {
- if (is.character(file)) {
- if (file == "") {
- file <- stdout()
- } else if (substring(file, 1L, 1L) == "|") {
- file <- pipe(substring(file, 2L), "w")
- on.exit(close(file))
- } else {
- file <- file(file, ifelse(append, "a", "w"))
- on.exit(close(file))
+ if (is.character(file)) {
+ if (file == "") {
+ file <- stdout()
+ } else if (substring(file, 1L, 1L) == "|") {
+ file <- pipe(substring(file, 2L), "w")
+ on.exit(close(file))
+ } else {
+ file <- file(file, ifelse(append, "a", "w"))
+ on.exit(close(file))
+ }
}
- }
- .Internal(cat(list(...), file, sep, fill, labels, append))
+ .Internal(cat(list(...), file, sep, fill, labels, append))
}
# Function to write code for html head and title
html_head <- function(title) {
- cata("
\n")
- cata("", title, "\n")
- cata("\n")
+ cata("\n")
+ cata("", title, "\n")
+ cata("\n")
}
# Function to write code for html links
html_link <- function(address, label = address) {
- cata("", label, "
\n")
+ cata("", label, "
\n")
}
# Function to write code for html images
html_image <- function(source, label = source, height = 600, width = 600) {
- cata("\n")
+ cata("\n")
}
# Function to write code for html list items
list_item <- function(...) {
- cata("", ..., "\n")
+ cata("", ..., "\n")
}
table_item <- function(...) {
- cata("", ..., " | \n")
+ cata("", ..., " | \n")
}
table_head_item <- function(...) {
- cata("", ..., " | \n")
+ cata("", ..., " | \n")
}
################################################################################
@@ -160,160 +160,161 @@
# Get options, using the spec as defined by the enclosed list.
# Read the options from the default: commandArgs(TRUE).
-spec <- matrix(c(
- "htmlPath", "R", 1, "character",
- "outPath", "o", 1, "character",
- "filesPath", "j", 2, "character",
- "matrixPath", "m", 2, "character",
- "factFile", "f", 2, "character",
- "formula", "F", 2, "character",
- "factInput", "i", 2, "character",
- "annoPath", "a", 2, "character",
- "contrastData", "C", 1, "character",
- "cpmReq", "c", 1, "double",
- "totReq", "y", 0, "logical",
- "cntReq", "z", 1, "integer",
- "sampleReq", "s", 1, "integer",
- "normCounts", "x", 0, "logical",
- "rdaOpt", "r", 0, "logical",
- "lfcReq", "l", 1, "double",
- "pValReq", "p", 1, "double",
- "pAdjOpt", "d", 1, "character",
- "normOpt", "n", 1, "character",
- "robOpt", "b", 0, "logical",
- "lrtOpt", "t", 0, "logical"
-),
-byrow = TRUE, ncol = 4
+spec <- matrix(
+ c(
+ "htmlPath", "R", 1, "character",
+ "outPath", "o", 1, "character",
+ "filesPath", "j", 2, "character",
+ "matrixPath", "m", 2, "character",
+ "factFile", "f", 2, "character",
+ "formula", "F", 2, "character",
+ "factInput", "i", 2, "character",
+ "annoPath", "a", 2, "character",
+ "contrastData", "C", 1, "character",
+ "cpmReq", "c", 1, "double",
+ "totReq", "y", 0, "logical",
+ "cntReq", "z", 1, "integer",
+ "sampleReq", "s", 1, "integer",
+ "normCounts", "x", 0, "logical",
+ "rdaOpt", "r", 0, "logical",
+ "lfcReq", "l", 1, "double",
+ "pValReq", "p", 1, "double",
+ "pAdjOpt", "d", 1, "character",
+ "normOpt", "n", 1, "character",
+ "robOpt", "b", 0, "logical",
+ "lrtOpt", "t", 0, "logical"
+ ),
+ byrow = TRUE, ncol = 4
)
opt <- getopt(spec)
if (is.null(opt$matrixPath) && is.null(opt$filesPath)) {
- cat("A counts matrix (or a set of counts files) is required.\n")
- q(status = 1)
+ cat("A counts matrix (or a set of counts files) is required.\n")
+ q(status = 1)
}
if (is.null(opt$cpmReq)) {
- filt_cpm <- FALSE
+ filt_cpm <- FALSE
} else {
- filt_cpm <- TRUE
+ filt_cpm <- TRUE
}
if (is.null(opt$cntReq) || is.null(opt$sampleReq)) {
- filt_smpcount <- FALSE
+ filt_smpcount <- FALSE
} else {
- filt_smpcount <- TRUE
+ filt_smpcount <- TRUE
}
if (is.null(opt$totReq)) {
- filt_totcount <- FALSE
+ filt_totcount <- FALSE
} else {
- filt_totcount <- TRUE
+ filt_totcount <- TRUE
}
if (is.null(opt$lrtOpt)) {
- want_lrt <- FALSE
+ want_lrt <- FALSE
} else {
- want_lrt <- TRUE
+ want_lrt <- TRUE
}
if (is.null(opt$rdaOpt)) {
- want_rda <- FALSE
+ want_rda <- FALSE
} else {
- want_rda <- TRUE
+ want_rda <- TRUE
}
if (is.null(opt$annoPath)) {
- have_anno <- FALSE
+ have_anno <- FALSE
} else {
- have_anno <- TRUE
+ have_anno <- TRUE
}
if (is.null(opt$normCounts)) {
- want_norm <- FALSE
+ want_norm <- FALSE
} else {
- want_norm <- TRUE
+ want_norm <- TRUE
}
if (is.null(opt$robOpt)) {
- want_robust <- FALSE
+ want_robust <- FALSE
} else {
- want_robust <- TRUE
+ want_robust <- TRUE
}
if (!is.null(opt$filesPath)) {
- # Process the separate count files (adapted from DESeq2 wrapper)
- library("rjson")
- parser <- newJSONParser()
- parser$addData(opt$filesPath)
- factor_list <- parser$getObject()
- factors <- sapply(factor_list, function(x) x[[1]])
- filenames_in <- unname(unlist(factor_list[[1]][[2]]))
- sampletable <- data.frame(
- sample = basename(filenames_in),
- filename = filenames_in,
- row.names = filenames_in,
- stringsAsFactors = FALSE
- )
- for (factor in factor_list) {
- factorname <- factor[[1]]
- sampletable[[factorname]] <- character(nrow(sampletable))
- lvls <- sapply(factor[[2]], function(x) names(x))
- for (i in seq_along(factor[[2]])) {
- files <- factor[[2]][[i]][[1]]
- sampletable[files, factorname] <- lvls[i]
+ # Process the separate count files (adapted from DESeq2 wrapper)
+ library("rjson")
+ parser <- newJSONParser()
+ parser$addData(opt$filesPath)
+ factor_list <- parser$getObject()
+ factors <- sapply(factor_list, function(x) x[[1]])
+ filenames_in <- unname(unlist(factor_list[[1]][[2]]))
+ sampletable <- data.frame(
+ sample = basename(filenames_in),
+ filename = filenames_in,
+ row.names = filenames_in,
+ stringsAsFactors = FALSE
+ )
+ for (factor in factor_list) {
+ factorname <- factor[[1]]
+ sampletable[[factorname]] <- character(nrow(sampletable))
+ lvls <- sapply(factor[[2]], function(x) names(x))
+ for (i in seq_along(factor[[2]])) {
+ files <- factor[[2]][[i]][[1]]
+ sampletable[files, factorname] <- lvls[i]
+ }
+ sampletable[[factorname]] <- factor(sampletable[[factorname]], levels = lvls)
}
- sampletable[[factorname]] <- factor(sampletable[[factorname]], levels = lvls)
- }
- rownames(sampletable) <- sampletable$sample
- rem <- c("sample", "filename")
- factors <- sampletable[, !(names(sampletable) %in% rem), drop = FALSE]
+ rownames(sampletable) <- sampletable$sample
+ rem <- c("sample", "filename")
+ factors <- sampletable[, !(names(sampletable) %in% rem), drop = FALSE]
- # read in count files and create single table
- countfiles <- lapply(sampletable$filename, function(x) {
- read.delim(x, row.names = 1)
- })
- counts <- do.call("cbind", countfiles)
+ # read in count files and create single table
+ countfiles <- lapply(sampletable$filename, function(x) {
+ read.delim(x, row.names = 1)
+ })
+ counts <- do.call("cbind", countfiles)
} else {
- # Process the single count matrix
- counts <- read.table(opt$matrixPath, header = TRUE, sep = "\t", strip.white = TRUE, stringsAsFactors = FALSE)
- row.names(counts) <- counts[, 1]
- counts <- counts[, -1]
- countsrows <- nrow(counts)
+ # Process the single count matrix
+ counts <- read.table(opt$matrixPath, header = TRUE, sep = "\t", strip.white = TRUE, stringsAsFactors = FALSE)
+ row.names(counts) <- counts[, 1]
+ counts <- counts[, -1]
+ countsrows <- nrow(counts)
- # Process factors
- if (is.null(opt$factInput)) {
- factordata <- read.table(opt$factFile, header = TRUE, sep = "\t", strip.white = TRUE, stringsAsFactors = TRUE)
- # check samples names match
- if (!any(factordata[, 1] %in% colnames(counts))) {
- stop("Sample IDs in factors file and count matrix don't match")
+ # Process factors
+ if (is.null(opt$factInput)) {
+ factordata <- read.table(opt$factFile, header = TRUE, sep = "\t", strip.white = TRUE, stringsAsFactors = TRUE)
+ # check samples names match
+ if (!any(factordata[, 1] %in% colnames(counts))) {
+ stop("Sample IDs in factors file and count matrix don't match")
+ }
+ # order samples as in counts matrix
+ factordata <- factordata[match(colnames(counts), factordata[, 1]), ]
+ factors <- data.frame(sapply(factordata[, -1, drop = FALSE], make.names))
+ } else {
+ factors <- unlist(strsplit(opt$factInput, "|", fixed = TRUE))
+ factordata <- list()
+ for (fact in factors) {
+ newfact <- unlist(strsplit(fact, split = "::"))
+ factordata <- rbind(factordata, newfact)
+ } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor.
+
+ # Set the row names to be the name of the factor and delete first row
+ row.names(factordata) <- factordata[, 1]
+ factordata <- factordata[, -1]
+ factordata <- sapply(factordata, sanitise_groups)
+ factordata <- sapply(factordata, strsplit, split = ",")
+ factordata <- sapply(factordata, make.names)
+ # Transform factor data into data frame of R factor objects
+ factors <- data.frame(factordata)
}
- # order samples as in counts matrix
- factordata <- factordata[match(colnames(counts), factordata[, 1]), ]
- factors <- data.frame(sapply(factordata[, -1, drop = FALSE], make.names))
- } else {
- factors <- unlist(strsplit(opt$factInput, "|", fixed = TRUE))
- factordata <- list()
- for (fact in factors) {
- newfact <- unlist(strsplit(fact, split = "::"))
- factordata <- rbind(factordata, newfact)
- } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor.
-
- # Set the row names to be the name of the factor and delete first row
- row.names(factordata) <- factordata[, 1]
- factordata <- factordata[, -1]
- factordata <- sapply(factordata, sanitise_groups)
- factordata <- sapply(factordata, strsplit, split = ",")
- factordata <- sapply(factordata, make.names)
- # Transform factor data into data frame of R factor objects
- factors <- data.frame(factordata)
- }
}
# if annotation file provided
if (have_anno) {
- geneanno <- read.table(opt$annoPath, header = TRUE, sep = "\t", quote = "", strip.white = TRUE, stringsAsFactors = FALSE)
+ geneanno <- read.table(opt$annoPath, header = TRUE, sep = "\t", quote = "", strip.white = TRUE, stringsAsFactors = FALSE)
}
# Create output directory
@@ -322,10 +323,10 @@
# Check if contrastData is a file or not
if (file.exists(opt$contrastData)) {
- contrast_data <- unlist(read.table(opt$contrastData, sep = "\t", header = TRUE)[[1]])
+ contrast_data <- unlist(read.table(opt$contrastData, sep = "\t", header = TRUE)[[1]])
} else {
- # Split up contrasts separated by comma into a vector then sanitise
- contrast_data <- unlist(strsplit(opt$contrastData, split = ","))
+ # Split up contrasts separated by comma into a vector then sanitise
+ contrast_data <- unlist(strsplit(opt$contrastData, split = ","))
}
contrast_data <- sanitise_equation(contrast_data)
contrast_data <- gsub(" ", ".", contrast_data, fixed = TRUE)
@@ -337,16 +338,16 @@
mds_pdf <- character() # Initialise character vector
mds_png <- character()
for (i in seq_len(ncol(factors))) {
- mds_pdf[i] <- make_out(paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".pdf"))
- mds_png[i] <- make_out(paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".png"))
+ mds_pdf[i] <- make_out(paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".pdf"))
+ mds_png[i] <- make_out(paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".png"))
}
md_pdf <- character()
md_png <- character()
top_out <- character()
for (i in seq_along(contrast_data)) {
- md_pdf[i] <- make_out(paste0("mdplot_", sanitise_basename(contrast_data[i]), ".pdf"))
- md_png[i] <- make_out(paste0("mdplot_", sanitise_basename(contrast_data[i]), ".png"))
- top_out[i] <- make_out(paste0("edgeR_", sanitise_basename(contrast_data[i]), ".tsv"))
+ md_pdf[i] <- make_out(paste0("mdplot_", sanitise_basename(contrast_data[i]), ".pdf"))
+ md_png[i] <- make_out(paste0("mdplot_", sanitise_basename(contrast_data[i]), ".png"))
+ top_out[i] <- make_out(paste0("edgeR_", sanitise_basename(contrast_data[i]), ".tsv"))
} # Save output paths for each contrast as vectors
norm_out <- make_out("edgeR_normcounts.tsv")
rda_out <- make_out("edgeR_analysis.RData")
@@ -370,11 +371,11 @@
data <- list()
data$counts <- counts
if (have_anno) {
- # order annotation by genes in counts (assumes gene ids are in 1st column of geneanno)
- annoord <- geneanno[match(row.names(counts), geneanno[, 1]), ]
- data$genes <- annoord
+ # order annotation by genes in counts (assumes gene ids are in 1st column of geneanno)
+ annoord <- geneanno[match(row.names(counts), geneanno[, 1]), ]
+ data$genes <- annoord
} else {
- data$genes <- data.frame(GeneID = row.names(counts))
+ data$genes <- data.frame(GeneID = row.names(counts))
}
# If filter crieteria set, filter out genes that do not have a required cpm/counts in a required number of
@@ -382,16 +383,16 @@
prefilter_count <- nrow(data$counts)
if (filt_cpm || filt_smpcount || filt_totcount) {
- if (filt_totcount) {
- keep <- rowSums(data$counts) >= opt$cntReq
- } else if (filt_smpcount) {
- keep <- rowSums(data$counts >= opt$cntReq) >= opt$sampleReq
- } else if (filt_cpm) {
- keep <- rowSums(cpm(data$counts) >= opt$cpmReq) >= opt$sampleReq
- }
+ if (filt_totcount) {
+ keep <- rowSums(data$counts) >= opt$cntReq
+ } else if (filt_smpcount) {
+ keep <- rowSums(data$counts >= opt$cntReq) >= opt$sampleReq
+ } else if (filt_cpm) {
+ keep <- rowSums(cpm(data$counts) >= opt$cpmReq) >= opt$sampleReq
+ }
- data$counts <- data$counts[keep, ]
- data$genes <- data$genes[keep, , drop = FALSE]
+ data$counts <- data$counts[keep, ]
+ data$genes <- data$genes[keep, , drop = FALSE]
}
postfilter_count <- nrow(data$counts)
@@ -411,32 +412,32 @@
if (!is.null(opt$formula)) {
- formula <- opt$formula
- # sanitisation can be getting rid of the "~"
- if (!startsWith(formula, "~")) {
- formula <- paste0("~", formula)
- }
+ formula <- opt$formula
+ # sanitisation can be getting rid of the "~"
+ if (!startsWith(formula, "~")) {
+ formula <- paste0("~", formula)
+ }
} else {
- formula <- "~0"
- for (i in seq_along(factor_list)) {
- formula <- paste(formula, factor_list[i], sep = "+")
- }
+ formula <- "~0"
+ for (i in seq_along(factor_list)) {
+ formula <- paste(formula, factor_list[i], sep = "+")
+ }
}
formula <- formula(formula)
design <- model.matrix(formula, factors)
for (i in seq_along(factor_list)) {
- colnames(design) <- gsub(factor_list[i], "", colnames(design), fixed = TRUE)
+ colnames(design) <- gsub(factor_list[i], "", colnames(design), fixed = TRUE)
}
# Calculating normalising factor, estimating dispersion
data <- calcNormFactors(data, method = opt$normOpt)
if (want_robust) {
- data <- estimateDisp(data, design = design, robust = TRUE)
+ data <- estimateDisp(data, design = design, robust = TRUE)
} else {
- data <- estimateDisp(data, design = design)
+ data <- estimateDisp(data, design = design)
}
# Generate contrasts information
@@ -466,21 +467,21 @@
# If additional factors create additional MDS plots coloured by factor
if (ncol(factors) > 1) {
- for (i in 2:ncol(factors)) {
- png(mds_png[i], width = 600, height = 600)
- plotMDS(data, labels = labels, col = as.numeric(factors[, i]), cex = 0.8, main = paste("MDS Plot:", names(factors)[i]))
- img_name <- paste0("MDS Plot_", sanitise_basename(names(factors)[i]), ".png")
- img_addr <- paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".png")
- image_data <- rbind(image_data, c(img_name, img_addr))
- invisible(dev.off())
+ for (i in 2:ncol(factors)) {
+ png(mds_png[i], width = 600, height = 600)
+ plotMDS(data, labels = labels, col = as.numeric(factors[, i]), cex = 0.8, main = paste("MDS Plot:", names(factors)[i]))
+ img_name <- paste0("MDS Plot_", sanitise_basename(names(factors)[i]), ".png")
+ img_addr <- paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".png")
+ image_data <- rbind(image_data, c(img_name, img_addr))
+ invisible(dev.off())
- pdf(mds_pdf[i])
- plotMDS(data, labels = labels, col = as.numeric(factors[, i]), cex = 0.8, main = paste("MDS Plot:", names(factors)[i]))
- link_name <- paste0("MDS Plot_", sanitise_basename(names(factors)[i]), ".pdf")
- link_addr <- paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".pdf")
- link_data <- rbind(link_data, c(link_name, link_addr))
- invisible(dev.off())
- }
+ pdf(mds_pdf[i])
+ plotMDS(data, labels = labels, col = as.numeric(factors[, i]), cex = 0.8, main = paste("MDS Plot:", names(factors)[i]))
+ link_name <- paste0("MDS Plot_", sanitise_basename(names(factors)[i]), ".pdf")
+ link_addr <- paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".pdf")
+ link_data <- rbind(link_data, c(link_name, link_addr))
+ invisible(dev.off())
+ }
}
# BCV Plot
@@ -500,111 +501,111 @@
# Generate fit
if (want_lrt) {
- fit <- glmFit(data, design)
+ fit <- glmFit(data, design)
} else {
- if (want_robust) {
- fit <- glmQLFit(data, design, robust = TRUE)
- } else {
- fit <- glmQLFit(data, design)
- }
+ if (want_robust) {
+ fit <- glmQLFit(data, design, robust = TRUE)
+ } else {
+ fit <- glmQLFit(data, design)
+ }
- # Plot QL dispersions
- png(ql_png, width = 600, height = 600)
- plotQLDisp(fit, main = "QL Plot")
- img_name <- "QL Plot"
- img_addr <- "qlplot.png"
- image_data <- rbind(image_data, c(img_name, img_addr))
- invisible(dev.off())
+ # Plot QL dispersions
+ png(ql_png, width = 600, height = 600)
+ plotQLDisp(fit, main = "QL Plot")
+ img_name <- "QL Plot"
+ img_addr <- "qlplot.png"
+ image_data <- rbind(image_data, c(img_name, img_addr))
+ invisible(dev.off())
- pdf(ql_pdf)
- plotQLDisp(fit, main = "QL Plot")
- link_name <- "QL Plot.pdf"
- link_addr <- "qlplot.pdf"
- link_data <- rbind(link_data, c(link_name, link_addr))
- invisible(dev.off())
+ pdf(ql_pdf)
+ plotQLDisp(fit, main = "QL Plot")
+ link_name <- "QL Plot.pdf"
+ link_addr <- "qlplot.pdf"
+ link_data <- rbind(link_data, c(link_name, link_addr))
+ invisible(dev.off())
}
# Save normalised counts (log2cpm)
if (want_norm) {
- normalised_counts <- cpm(data, normalized.lib.sizes = TRUE, log = TRUE)
- normalised_counts <- data.frame(data$genes, normalised_counts)
- write.table(normalised_counts, file = norm_out, row.names = FALSE, sep = "\t", quote = FALSE)
- link_data <- rbind(link_data, c("edgeR_normcounts.tsv", "edgeR_normcounts.tsv"))
+ normalised_counts <- cpm(data, normalized.lib.sizes = TRUE, log = TRUE)
+ normalised_counts <- data.frame(data$genes, normalised_counts)
+ write.table(normalised_counts, file = norm_out, row.names = FALSE, sep = "\t", quote = FALSE)
+ link_data <- rbind(link_data, c("edgeR_normcounts.tsv", "edgeR_normcounts.tsv"))
}
for (i in seq_along(contrast_data)) {
- if (want_lrt) {
- res <- glmLRT(fit, contrast = contrasts[, i])
- } else {
- res <- glmQLFTest(fit, contrast = contrasts[, i])
- }
+ if (want_lrt) {
+ res <- glmLRT(fit, contrast = contrasts[, i])
+ } else {
+ res <- glmQLFTest(fit, contrast = contrasts[, i])
+ }
- status <- decideTestsDGE(res,
- adjust.method = opt$pAdjOpt, p.value = opt$pValReq,
- lfc = opt$lfcReq
- )
- sum_status <- summary(status)
+ status <- decideTestsDGE(res,
+ adjust.method = opt$pAdjOpt, p.value = opt$pValReq,
+ lfc = opt$lfcReq
+ )
+ sum_status <- summary(status)
- # Collect counts for differential expression
- up_count[i] <- sum_status["Up", ]
- down_count[i] <- sum_status["Down", ]
- flat_count[i] <- sum_status["NotSig", ]
+ # Collect counts for differential expression
+ up_count[i] <- sum_status["Up", ]
+ down_count[i] <- sum_status["Down", ]
+ flat_count[i] <- sum_status["NotSig", ]
- # Write top expressions table
- top <- topTags(res, adjust.method = opt$pAdjOpt, n = Inf, sort.by = "PValue")
- write.table(top, file = top_out[i], row.names = FALSE, sep = "\t", quote = FALSE)
+ # Write top expressions table
+ top <- topTags(res, adjust.method = opt$pAdjOpt, n = Inf, sort.by = "PValue")
+ write.table(top, file = top_out[i], row.names = FALSE, sep = "\t", quote = FALSE)
- link_name <- paste0("edgeR_", sanitise_basename(contrast_data[i]), ".tsv")
- link_addr <- paste0("edgeR_", sanitise_basename(contrast_data[i]), ".tsv")
- link_data <- rbind(link_data, c(link_name, link_addr))
+ link_name <- paste0("edgeR_", sanitise_basename(contrast_data[i]), ".tsv")
+ link_addr <- paste0("edgeR_", sanitise_basename(contrast_data[i]), ".tsv")
+ link_data <- rbind(link_data, c(link_name, link_addr))
- # Plot MD (log ratios vs mean difference) using limma package
- pdf(md_pdf[i])
- limma::plotMD(res,
- status = status,
- main = paste("MD Plot:", unmake_names(contrast_data[i])),
- hl.col = alpha(c("firebrick", "blue"), 0.4), values = c(1, -1),
- xlab = "Average Expression", ylab = "logFC"
- )
+ # Plot MD (log ratios vs mean difference) using limma package
+ pdf(md_pdf[i])
+ limma::plotMD(res,
+ status = status,
+ main = paste("MD Plot:", unmake_names(contrast_data[i])),
+ hl.col = alpha(c("firebrick", "blue"), 0.4), values = c(1, -1),
+ xlab = "Average Expression", ylab = "logFC"
+ )
- abline(h = 0, col = "grey", lty = 2)
+ abline(h = 0, col = "grey", lty = 2)
- link_name <- paste0("MD Plot_", sanitise_basename(contrast_data[i]), ".pdf")
- link_addr <- paste0("mdplot_", sanitise_basename(contrast_data[i]), ".pdf")
- link_data <- rbind(link_data, c(link_name, link_addr))
- invisible(dev.off())
+ link_name <- paste0("MD Plot_", sanitise_basename(contrast_data[i]), ".pdf")
+ link_addr <- paste0("mdplot_", sanitise_basename(contrast_data[i]), ".pdf")
+ link_data <- rbind(link_data, c(link_name, link_addr))
+ invisible(dev.off())
- png(md_png[i], height = 600, width = 600)
- limma::plotMD(res,
- status = status,
- main = paste("MD Plot:", unmake_names(contrast_data[i])),
- hl.col = alpha(c("firebrick", "blue"), 0.4), values = c(1, -1),
- xlab = "Average Expression", ylab = "logFC"
- )
+ png(md_png[i], height = 600, width = 600)
+ limma::plotMD(res,
+ status = status,
+ main = paste("MD Plot:", unmake_names(contrast_data[i])),
+ hl.col = alpha(c("firebrick", "blue"), 0.4), values = c(1, -1),
+ xlab = "Average Expression", ylab = "logFC"
+ )
- abline(h = 0, col = "grey", lty = 2)
+ abline(h = 0, col = "grey", lty = 2)
- img_name <- paste0("MD Plot_", sanitise_basename(contrast_data[i]), ".png")
- img_addr <- paste0("mdplot_", sanitise_basename(contrast_data[i]), ".png")
- image_data <- rbind(image_data, c(img_name, img_addr))
- invisible(dev.off())
+ img_name <- paste0("MD Plot_", sanitise_basename(contrast_data[i]), ".png")
+ img_addr <- paste0("mdplot_", sanitise_basename(contrast_data[i]), ".png")
+ image_data <- rbind(image_data, c(img_name, img_addr))
+ invisible(dev.off())
}
sig_diff <- data.frame(Up = up_count, Flat = flat_count, Down = down_count)
row.names(sig_diff) <- contrast_data
# Save relevant items as rda object
if (want_rda) {
- if (want_norm) {
- save(counts, data, status, normalised_counts, labels, factors, fit, res, top, contrasts, design,
- file = rda_out, ascii = TRUE
- )
- } else {
- save(counts, data, status, labels, factors, fit, res, top, contrasts, design,
- file = rda_out, ascii = TRUE
- )
- }
- link_data <- rbind(link_data, c("edgeR_analysis.RData", "edgeR_analysis.RData"))
+ if (want_norm) {
+ save(counts, data, status, normalised_counts, labels, factors, fit, res, top, contrasts, design,
+ file = rda_out, ascii = TRUE
+ )
+ } else {
+ save(counts, data, status, labels, factors, fit, res, top, contrasts, design,
+ file = rda_out, ascii = TRUE
+ )
+ }
+ link_data <- rbind(link_data, c("edgeR_analysis.RData", "edgeR_analysis.RData"))
}
# Record session info
@@ -632,7 +633,7 @@
html_image(image_data$Link[1], image_data$Label[1])
for (i in 2:nrow(image_data)) {
- html_image(image_data$Link[i], image_data$Label[i])
+ html_image(image_data$Link[i], image_data$Label[i])
}
cata("Differential Expression Counts:
\n")
@@ -641,40 +642,40 @@
cata("\n")
table_item()
for (i in colnames(sig_diff)) {
- table_head_item(i)
+ table_head_item(i)
}
cata("
\n")
for (i in seq_len(nrow(sig_diff))) {
- cata("\n")
- table_head_item(unmake_names(row.names(sig_diff)[i]))
- for (j in seq_len(ncol(sig_diff))) {
- table_item(as.character(sig_diff[i, j]))
- }
- cata("
\n")
+ cata("\n")
+ table_head_item(unmake_names(row.names(sig_diff)[i]))
+ for (j in seq_len(ncol(sig_diff))) {
+ table_item(as.character(sig_diff[i, j]))
+ }
+ cata("
\n")
}
cata("")
cata("Plots:
\n")
for (i in seq_len(nrow(link_data))) {
- if (grepl(".pdf", link_data$Link[i])) {
- html_link(link_data$Link[i], link_data$Label[i])
- }
+ if (grepl(".pdf", link_data$Link[i])) {
+ html_link(link_data$Link[i], link_data$Label[i])
+ }
}
cata("Tables:
\n")
for (i in seq_len(nrow(link_data))) {
- if (grepl(".tsv", link_data$Link[i])) {
- html_link(link_data$Link[i], link_data$Label[i])
- }
+ if (grepl(".tsv", link_data$Link[i])) {
+ html_link(link_data$Link[i], link_data$Label[i])
+ }
}
if (want_rda) {
- cata("R Data Objects:
\n")
- for (i in seq_len(nrow(link_data))) {
- if (grepl(".RData", link_data$Link[i])) {
- html_link(link_data$Link[i], link_data$Label[i])
+ cata("R Data Objects:
\n")
+ for (i in seq_len(nrow(link_data))) {
+ if (grepl(".RData", link_data$Link[i])) {
+ html_link(link_data$Link[i], link_data$Label[i])
+ }
}
- }
}
cata("Alt-click links to download file.
\n")
@@ -686,68 +687,68 @@
cata("\n")
if (filt_cpm || filt_smpcount || filt_totcount) {
- if (filt_cpm) {
- temp_str <- paste(
- "Genes without more than", opt$cpmReq,
- "CPM in at least", opt$sampleReq, "samples are insignificant",
- "and filtered out."
- )
- } else if (filt_smpcount) {
- temp_str <- paste(
- "Genes without more than", opt$cntReq,
- "counts in at least", opt$sampleReq, "samples are insignificant",
- "and filtered out."
+ if (filt_cpm) {
+ temp_str <- paste(
+ "Genes without more than", opt$cpmReq,
+ "CPM in at least", opt$sampleReq, "samples are insignificant",
+ "and filtered out."
+ )
+ } else if (filt_smpcount) {
+ temp_str <- paste(
+ "Genes without more than", opt$cntReq,
+ "counts in at least", opt$sampleReq, "samples are insignificant",
+ "and filtered out."
+ )
+ } else if (filt_totcount) {
+ temp_str <- paste(
+ "Genes without more than", opt$cntReq,
+ "counts, after summing counts for all samples, are insignificant",
+ "and filtered out."
+ )
+ }
+
+ list_item(temp_str)
+ filter_prop <- round(filtered_count / prefilter_count * 100, digits = 2)
+ temp_str <- paste0(
+ filtered_count, " of ", prefilter_count, " (", filter_prop,
+ "%) genes were filtered out for low expression."
)
- } else if (filt_totcount) {
- temp_str <- paste(
- "Genes without more than", opt$cntReq,
- "counts, after summing counts for all samples, are insignificant",
- "and filtered out."
- )
- }
-
- list_item(temp_str)
- filter_prop <- round(filtered_count / prefilter_count * 100, digits = 2)
- temp_str <- paste0(
- filtered_count, " of ", prefilter_count, " (", filter_prop,
- "%) genes were filtered out for low expression."
- )
- list_item(temp_str)
+ list_item(temp_str)
}
list_item(opt$normOpt, " was the method used to normalise library sizes.")
if (want_lrt) {
- list_item("The edgeR likelihood ratio test was used.")
+ list_item("The edgeR likelihood ratio test was used.")
} else {
- if (want_robust) {
- list_item("The edgeR quasi-likelihood test was used with robust settings (robust=TRUE with estimateDisp and glmQLFit).")
- } else {
- list_item("The edgeR quasi-likelihood test was used.")
- }
+ if (want_robust) {
+ list_item("The edgeR quasi-likelihood test was used with robust settings (robust=TRUE with estimateDisp and glmQLFit).")
+ } else {
+ list_item("The edgeR quasi-likelihood test was used.")
+ }
}
if (opt$pAdjOpt != "none") {
- if (opt$pAdjOpt == "BH" || opt$pAdjOpt == "BY") {
+ if (opt$pAdjOpt == "BH" || opt$pAdjOpt == "BY") {
+ temp_str <- paste0(
+ "MD-Plot highlighted genes are significant at FDR ",
+ "of ", opt$pValReq, " and exhibit log2-fold-change of at ",
+ "least ", opt$lfcReq, "."
+ )
+ list_item(temp_str)
+ } else if (opt$pAdjOpt == "holm") {
+ temp_str <- paste0(
+ "MD-Plot highlighted genes are significant at adjusted ",
+ "p-value of ", opt$pValReq, " by the Holm(1979) ",
+ "method, and exhibit log2-fold-change of at least ",
+ opt$lfcReq, "."
+ )
+ list_item(temp_str)
+ }
+} else {
temp_str <- paste0(
- "MD-Plot highlighted genes are significant at FDR ",
- "of ", opt$pValReq, " and exhibit log2-fold-change of at ",
- "least ", opt$lfcReq, "."
+ "MD-Plot highlighted genes are significant at p-value ",
+ "of ", opt$pValReq, " and exhibit log2-fold-change of at ",
+ "least ", opt$lfcReq, "."
)
list_item(temp_str)
- } else if (opt$pAdjOpt == "holm") {
- temp_str <- paste0(
- "MD-Plot highlighted genes are significant at adjusted ",
- "p-value of ", opt$pValReq, " by the Holm(1979) ",
- "method, and exhibit log2-fold-change of at least ",
- opt$lfcReq, "."
- )
- list_item(temp_str)
- }
-} else {
- temp_str <- paste0(
- "MD-Plot highlighted genes are significant at p-value ",
- "of ", opt$pValReq, " and exhibit log2-fold-change of at ",
- "least ", opt$lfcReq, "."
- )
- list_item(temp_str)
}
cata("
\n")
@@ -761,26 +762,26 @@
table_head_item(names(factors)[1], " (Primary Factor)")
if (ncol(factors) > 1) {
- for (i in names(factors)[2:length(names(factors))]) {
- table_head_item(i)
- }
- cata("\n")
+ for (i in names(factors)[2:length(names(factors))]) {
+ table_head_item(i)
+ }
+ cata("\n")
}
for (i in seq_len(nrow((factors)))) {
- cata("\n")
- table_head_item(row.names(factors)[i])
- for (j in seq_len(ncol(factors))) {
- table_item(as.character(unmake_names(factors[i, j])))
- }
- cata("
\n")
+ cata("\n")
+ table_head_item(row.names(factors)[i])
+ for (j in seq_len(ncol(factors))) {
+ table_item(as.character(unmake_names(factors[i, j])))
+ }
+ cata("
\n")
}
cata("")
for (i in seq_len(nrow(link_data))) {
- if (grepl("session_info", link_data$Link[i])) {
- html_link(link_data$Link[i], link_data$Label[i])
- }
+ if (grepl("session_info", link_data$Link[i])) {
+ html_link(link_data$Link[i], link_data$Label[i])
+ }
}
cata("\n")
diff -r 5bf899c13979 -r ae2aad0a6d50 edger.xml
--- a/edger.xml Wed Nov 22 03:57:37 2023 +0000
+++ b/edger.xml Wed Sep 04 15:50:01 2024 +0000
@@ -4,7 +4,7 @@
3.36.0
- 4
+ 5
topic_3308
@@ -130,11 +130,8 @@
-
-
-
-
-
+
+ ^[\w]+$